/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.test.pigunit; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.lang.String; import java.util.HashMap; import java.util.Map; import java.util.Properties; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.pig.pigunit.Cluster; import org.apache.pig.pigunit.PigTest; import org.apache.pig.pigunit.pig.PigServer; import org.apache.pig.test.Util; import org.apache.pig.tools.parameters.ParseException; import org.apache.pig.impl.util.PropertiesUtil; import org.junit.BeforeClass; import org.junit.ComparisonFailure; import org.junit.Ignore; import org.junit.Test; /** * <p> * Various examples about how to use PigUnit. * * <p> * Requires in CLASSPATH: * <ul> * <li>pig.jar</li> * <li>pigunit.jar</li> * <li>$HADOOP_CONF_DIR to current/future cluster if not using LOCAL mode</li> * </ul> */ public class TestPigTest { private PigTest test; private static Cluster cluster; private static final String PIG_SCRIPT = "test/data/pigunit/top_queries.pig"; private static final String PIG_SCRIPT_MACRO = "test/data/pigunit/top_queries_macro.pig"; private static final Log LOG = LogFactory.getLog(TestPigTest.class); @BeforeClass public static void setUpOnce() throws Exception { System.getProperties().setProperty("pigunit.exectype", Util.getLocalTestMode().toString()); cluster = PigTest.getCluster(); cluster.update( new Path("test/data/pigunit/top_queries_input_data.txt"), new Path("top_queries_input_data.txt")); } @Test public void testNtoN() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput("queries_limit", output); } @Test public void testImplicitNtoN() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput(output); } @Test public void testTextInput() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] input = { "yahoo\t10", "twitter\t7", "facebook\t10", "yahoo\t15", "facebook\t5", "a\t1", "b\t2", "c\t3", "d\t4", "e\t5", }; String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput("data", input, "queries_limit", output); } @Test public void testDelimiter() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] input = { "yahoo,10", "twitter,7", "facebook,10", "yahoo,15", "facebook,5", "a,1", "b,2", "c,3", "d,4", "e,5", }; String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput("data", input, "queries_limit", output, ","); } @Test public void testSubset() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] input = { "yahoo\t10", "twitter\t7", "facebook\t10", "yahoo\t15", "facebook\t5", "a\t1", "b\t2", "c\t3", "d\t4", "e\t5", }; String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput("data", input, "queries_limit", output); } @Test public void testOverride() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); test.override("queries_limit", "queries_limit = LIMIT queries_ordered 2;"); String[] output = { "(yahoo,25)", "(facebook,15)", }; test.assertOutput(output); } @Test public void testInlinePigScript() throws ParseException, IOException { String[] script = { "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", "queries_group = GROUP data BY query PARALLEL 1;", "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;", "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;", "queries_limit = LIMIT queries_ordered 3;", "STORE queries_limit INTO 'top_3_queries';", }; test = new PigTest(script); String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput(output); } @Test public void testFileOutput() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt")); } @Test public void testArgFiles() throws ParseException, IOException { String[] argsFile = { "test/data/pigunit/top_queries_params.txt" }; test = new PigTest(PIG_SCRIPT, null, argsFile); test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt")); } @Test public void testGetLastAlias() throws ParseException, IOException { String[] script = { "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", "queries_group = GROUP data BY query PARALLEL 1;", "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;", "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;", "queries_limit = LIMIT queries_ordered 3;", "STORE queries_limit INTO 'top_3_queries';", }; test = new PigTest(script); String expected = "(yahoo,25)\n" + "(facebook,15)\n" + "(twitter,7)"; assertEquals(expected, StringUtils.join(test.getAlias("queries_limit"), "\n")); } @Test public void testWithUdf() throws ParseException, IOException { String[] script = { // "REGISTER myIfNeeded.jar;", "DEFINE TOKENIZE TOKENIZE();", "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", "queries = FOREACH data GENERATE query, TOKENIZE(query) AS query_tokens;", "queries_ordered = ORDER queries BY query DESC PARALLEL 1;", "queries_limit = LIMIT queries_ordered 3;", "STORE queries_limit INTO 'top_3_queries';", }; test = new PigTest(script); String[] output = { "(yahoo,{(yahoo)})", "(yahoo,{(yahoo)})", "(twitter,{(twitter)})", }; test.assertOutput(output); } @Test public void testStore() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); // By default PigUnit removes all the STORE and DUMP test.unoverride("STORE"); test.runScript(); assertTrue(cluster.delete(new Path("top_3_queries"))); } @Test // Script should only be registered once, otherwise Pig will complain // Macro defined twice. See PIG-3114 public void testMacro() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT_MACRO, args); // By default PigUnit removes all the STORE and DUMP test.unoverride("STORE"); test.runScript(); assertTrue(cluster.delete(new Path("top_3_queries"))); } @Ignore("Not ready yet") @Test public void testWithMock() throws ParseException, IOException { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; PigServer mockServer = null; Cluster mockCluster = null; test = new PigTest(PIG_SCRIPT, args, mockServer, mockCluster); test.assertOutput(new File("data/top_queries_expected_top_3.txt")); } /** * This is a test for default bootup. PIG-2456 * @throws Exception */ @Test public void testDefaultBootup() throws Exception { // Test with properties object Properties pigProps = PropertiesUtil.loadDefaultProperties(); String bootupPath = "/tmp/.temppigbootup"; pigProps.setProperty("pig.load.default.statements", bootupPath); File bootupFile = new File(bootupPath); PrintWriter out = new PrintWriter(new FileWriter(bootupFile)); out.println("data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);"); out.close(); String[] script = { // The following line is commented as the test creates a bootup file which // contains it instead. PigTests (and Pig scripts in general) will read the // bootup file to load default statements // "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", "queries_group = GROUP data BY query PARALLEL 1;", "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;", "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;", "queries_limit = LIMIT queries_ordered 3;", "STORE queries_limit INTO 'top_3_queries';", }; String scriptPath = "/tmp/tempScript"; File scriptFile = new File(scriptPath); out = new PrintWriter(new FileWriter(scriptFile)); for (String line : script) { out.println(line); } out.close(); String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; // Create a pigunit.pig.PigServer and Cluster to run this test. PigServer pig = null; pig = new PigServer(Util.getLocalTestMode(), pigProps); final Cluster cluster = new Cluster(pig.getPigContext()); test = new PigTest(scriptPath, args, pig, cluster); String[] output = { "(yahoo,25)", "(facebook,15)", "(twitter,7)", }; test.assertOutput("queries_limit", output); scriptFile.delete(); bootupFile.delete(); } @Test public void testMockedAliasWithDefaultDelimiter() throws Exception { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args, null, null); String[] mockData = { "Apache\t99", "Pig\t42", "GitHub\t107", "Google\t404" }; test.mockAlias("queries_ordered", mockData,"(query: chararray,count: int)"); String[] expectedOutput = { "(Apache,99)", "(Pig,42)", "(GitHub,107)" }; test.assertOutput(expectedOutput); } @Test public void testMockedAliasWithDifferentDelimiter() throws Exception { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args, null, null); String[] mockData = { "Apache,99", "Pig,42", "GitHub,107", "Google,404" }; test.mockAlias("queries_ordered", mockData,"(query: chararray,count: int)", ","); String[] expectedOutput = { "(Apache,99)", "(Pig,42)", "(GitHub,107)" }; test.assertOutput(expectedOutput); } @Test public void testAliasSchemaMap() throws Exception { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args, null, null); final Map<String, String> expected = new HashMap<String, String>(); expected.put("data", "(query: chararray,count: int)"); expected.put("queries_group", "(group: chararray,data: {(query: chararray,count: int)})"); expected.put("queries_sum", "(query: chararray,count: long)"); expected.put("queries_ordered", "(query: chararray,count: long)"); expected.put("queries_limit", "(query: chararray,count: long)"); Map<String, String> map = test.getAliasToSchemaMap(); assertEquals(expected, map); } @Test public void testAnyOrderOutput() throws Exception { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] reorderedExpectedOutput = { "(twitter,7)", "(yahoo,25)", "(facebook,15)" }; test.assertOutputAnyOrder(reorderedExpectedOutput); } @Test public void testAnyOrderOutputForAlias() throws Exception { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] reorderedExpectedOutput = { "(twitter,7)", "(yahoo,25)", "(facebook,15)" }; test.assertOutputAnyOrder("queries_limit", reorderedExpectedOutput); } @Test public void testSpecificOrderOutput() throws Exception { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] reorderedExpectedOutput = { "(twitter,7)", "(yahoo,25)", "(facebook,15)" }; try { test.assertOutput(reorderedExpectedOutput); fail("assertOutput should fail when the records are unordered."); } catch(ComparisonFailure e) { //Test passes } } @Test public void testSpecificOrderOutputForAlias() throws Exception { String[] args = { "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries", }; test = new PigTest(PIG_SCRIPT, args); String[] reorderedExpectedOutput = { "(twitter,7)", "(yahoo,25)", "(facebook,15)" }; try { test.assertOutput("queries_limit", reorderedExpectedOutput); fail("assertOutput should fail when the records are unordered."); } catch(ComparisonFailure e) { //Test passes } } }